#!/bin/bash

TRAIN_ID=${1:-"both"}

PROMPT_VERSION=qwen
LLM_MODEL_PATH="checkpoints/Qwen2.5-0.5B-Instruct"
VE_MODEL_PATH="checkpoints/siglip2-1764-trained"
PROJ_TYPE=glu
NUM_PATCHES=1764
EXP_NAME="500m-siglip2-trained-$NUM_PATCHES"

accelerate launch --main_process_port 45677 train_mdpo.py \
    --deepspeed ./trains/zero2.json \
    --pretrain_model_path ./checkpoints/namo-$EXP_NAME-$PROJ_TYPE-sft-final-whole/ \
    --version $PROMPT_VERSION \
    --data_path data/posttraining/MMPR-v1.1/meta.json \
    --image_folder ./data/images_all \
    --conn_ve_llm_type $PROJ_TYPE \
    --tune_conn_ve_llm True \
    --dynamic_size True \
    --native_size_batched True \
    --num_patches $NUM_PATCHES \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --output_dir ./checkpoints/namo-$EXP_NAME-mdpo-$PROJ_TYPE \
    --num_train_epochs 1 \
    --per_device_train_batch_size 2 \
    --auto_find_batch_size True \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 10 \
    --eval_strategy "no" \
    --save_strategy "steps" \
    --save_steps 100 \
    --save_total_limit 1 \
    --learning_rate 0.6e-4 \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 False \
    --fp16 False \
    --bf16 True \
    --model_max_length 2480 \
    --gradient_checkpointing True \
    --dataloader_num_workers 4 \
    --lazy_preprocess True
